# -*- coding: utf-8 -*-#
#-------------------------------------------------------------------------------
# 建立者:        张宏达  
# Name:         lagou
# Description: 
# Author:       HONGDA
# Date:         2019/4/24
#-------------------------------------------------------------------------------

import requests
from lxml import etree
import time
import random


def coo_regular(cookie):
    coo = {}
    m = cookie.split(';')
    for k_v in m:
        k,v = k_v.split('=',1)
        coo[k.strip()] = v.replace("'",'')
    return coo

def csv_write(item):
    with open('lagou00.txt', 'a', encoding='gbk', newline='') as f:
        try:
            f.write(item)
        except Exception as e:
            print('write error! ', e)


def spider(list_url):
    r = requests.get(list_url, headers=headers, params=params)
    # time.sleep(random.randint(3,7))
    print(r.text)
    # res = r.json()
    # print(res)
    return
    if 'content' in res:
        data = res['content']['data']['page']['result']
        # print(data)
        for comp in data:
            com_detail_url = 'http://m.lagou.com/jobs/' + str(comp['positionId']) + '.html'
            response = requests.get(com_detail_url, headers=headers)
            time.sleep(random.randint(3, 7))
            sel = etree.HTML(response.text)
            # print(sel)
            try:
                miaoshu = '\n'.join(sel.xpath('sring(//*[@class="content"]/dd[2]/div)')[0].strip())
            except:
                miaoshu = ''
            try:
                xueli = sel.xpath('//*[@id="content"]/div[2]/div[1]/span[5]/span/text()')[0].strip()
            except:
                xueli = ''
            try:
                fuli = sel.xpath('//*[@id="content"]/div[2]/div[2]/text()')[0].strip()
            except:
                fuli = ''
            chengshi = comp.get('city')
            gongsi = comp.get('companyFullName')
            fabushijian = comp.get('createTime')
            zhiwei = comp.get('positionName')
            gongzi = comp.get('salary')
            item = [zhiwei, gongsi, xueli, fabushijian, gongzi, chengshi, com_detail_url, fuli, miaoshu]
            csv_write(item)
            print('正在抓取：', gongsi)
    else:
        print('not exist')


if __name__ == '__main__':
    UserAgent = ['Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0',
                 'Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36',
                 'Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00',
                 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
                 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
                 'Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0',
                 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36'
                 ]
    headers = {'Accept': 'application/json',
               'Accept-Encoding': 'gzip, deflate',
               'Accept-Language': 'zh-CN,zh;q=0.8',
               'Cache-Control': 'max-age=0',
               'Content-type': 'application/json;charset=utf-8',
               'Connection': 'keep-alive',
               # 'cookie':'user_trace_token=20190510060921-c6c4332f8f054472956815c999bba352; JSESSIONID=ABAAABAAAFDABFG3F3432261761509C020BDABC349F6518; _ga=GA1.3.162526824.1557439762; _gat=1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1557439762; LGSID=20190510060922-1984dd3a-72a7-11e9-9eec-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fm.lagou.com%2F; LGUID=20190510060922-1984e28c-72a7-11e9-9eec-5254005c3644; _ga=GA1.2.162526824.1557439762; _gid=GA1.2.2034331957.1557439763; LGRID=20190510060958-2f21a567-72a7-11e9-8beb-525400f775ce; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1557439799; X_HTTP_TOKEN=3f53cbc4f737f67389793475517d11047831542153',
               'Host': 'm.lagou.com',
               'Referer': 'https://m.lagou.com/',
               'Upgrade-Insecure-Requests': '1',
               'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
               'X-Requested-With':'XMLHttpRequest'}
    # //==cookie = 'user_trace_token=20190504211144-40ea6004-98cc-436b-b149-3a0944a148bb; LGUID=20190504211144-2a3e822b-6e6e-11e9-8372-525400f775ce; sajssdk_2015_cross_new_user=1; ab_test_random_num=0; PRE_UTM=; PRE_HOST=www.baidu.com; PRE_SITE=http%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DeBvO3IOIVigF0eOE-TukBTqcxUexcE1YVYl0PpcMHAy%26wd%3D%26eqid%3Db6732da000026c78000000055ccf006b; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; index_location_city=%E5%85%A8%E5%9B%BD; _gat=1; JSESSIONID=ABAAABAAAFDABFG33D3E7B27C1404AC82E32870DCDFF20D; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216a862f4c728f7-0937a7eb9d23d6-5d4e211f-1327104-16a862f4c7372e%22%2C%22%24device_id%22%3A%2216a862f4c728f7-0937a7eb9d23d6-5d4e211f-1327104-16a862f4c7372e%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; _putrc=5B65A545309FC2AF123F89F2B170EADC; login=true; unick=%E4%B8%89%E6%9C%88%E7%9A%84%E8%83%A1%E5%90%8C%E8%8A%B1; _ga=GA1.3.487531173.1556975503; _ga=GA1.2.487531173.1556975503; _gid=GA1.2.2008515558.1556975503; LGSID=20190505232552-1182875e-6f4a-11e9-858e-525400f775ce; LGRID=20190505233813-cb15e601-6f4b-11e9-858f-525400f775ce; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1557069505,1557069931,1557069950,1557070635; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1557070691; X_HTTP_TOKEN=229565b33c47820a3960707551fc0854f2aa23b3e4'
    # cookies = coo_regular(cookie)
    # proxies = [
    #     {'ipaddr':'http://171.41.81.25'},
    #     {'ipaddr':'http://171.41.81.115'},
    #     {'ipaddr':'https://171.41.80.219'},
    #     {'ipaddr':'http://119.49.85.178'}
    # ]
    # prox = random.choice(proxies)
    # all_url = ['http://m.lagou.com/search.json?city=%E5%85%A8%E5%9B%BD&positionName=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&pageNo=' + str(x) + '&pageSize=15' for x in range(1, 180)]
    # for url in all_url:
    #     spider(url)
    params = {
        'city':'全国',
        'positionName': '经理',
        'pageNo': 1,
        'pageSize': 15,
    }
    spider('http://m.lagou.com/search.json')
